import numpy as np
import pandas as pd
import seaborn as sns
% matplotlib inline
# Función para descarga de ficheros desde URL
import os
import urllib.request
def dl_data(url, output):
try:
f = urllib.request.urlopen(url)
print ("Downloading " + url)
os.makedirs(os.path.dirname(output), exist_ok=True)
with open(output, "wb") as local_file:
local_file.write(f.read())
except URLError:
print ("Error", url)
# Descarga de lugares culturales de Seattle
url = "https://data.seattle.gov/api/views/vsxr-aydq/rows.csv?accessType=DOWNLOAD"
output1 = './data/Seattle_Cultural_Space_Inventory.csv'
dl_data(url,output1)
import re
_underscorer1 = re.compile(r'(.)([A-Z][a-z]+)')
_underscorer2 = re.compile('([a-z0-9])([A-Z])')
def camelToSnake(s):
subbed = _underscorer1.sub(r'\1_\2', s)
return _underscorer2.sub(r'\1_\2', subbed).lower()
# Cargamos la información descargada en un dataframe
dfpois = pd.read_csv(os.path.join('./data/Seattle_Cultural_Space_Inventory.csv'))
dfpois.columns = dfpois.columns.map(lambda x: camelToSnake(x))
dfpois.head()
dfpois.shape
dfpois.isnull().sum()
dfpois.columns
# Corregimos el nombre de algunas columnas
dfpois.columns = ['name', 'phone', 'url', 'square_feet_total', 'neighborhood',
'organization_type', 'dominant_discipline', 'year_of_occupation',
'rent_vs_own', 'age_of_current_building', 'length_of_lease(date)',
'year_organization_founded', 'number_of_past_facilities',
'stages_and_theaters', 'stage_&_theater_seats ', 'gallery_space',
'gallery_square_feet', 'ada_compliant', 'available_parking',
'street_presence', 'rental_space ', 'alcohol_sales',
'organization_mission', 'funded_by_a&c', 'funded_by_4_culture',
'stability_index(5=very_stable,1=very_uncertain)',
'control_index(5=very_in_control, 1=very_out_of_control) ',
'constituency_over_50%_one_race',
'specific_demographics_and_community', 'organization_leadership',
'organization_artists', 'closed_date', 'closed?', 'address',
'location']
dfpois.dominant_discipline.value_counts()
# Eliminamos aquellos registros con la columna closed? = 0
dfpois = dfpois.loc[dfpois['closed?'] == 0.0]
# Eliminamos también los registros con algunas categorÃas por ser lugares demasiado comunes o de interés menor
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Service/Supply']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Arts/Cultural Training or Education']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Arts/Cultrual Administration or Advocacy']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Multi-use']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Multi-use ']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Preservation']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Community Center']
dfpois = dfpois.loc[dfpois.dominant_discipline != 'Heritage']
# Y los registros con nulos en la columna dominant_discipline
dfpois = dfpois.loc[~dfpois.dominant_discipline.isnull()]
dfpois.shape
# Comprobamos que se han eliminado 1196 - 911 = 285 registros
# Nos quedamos con la columna de localización y la separamos en coordenadas
dfpois = dfpois.location.str.strip('()') \
.str.split(', ', expand=True) \
.rename(columns={0:'latitude', 1:'longitude'})
dfpois = dfpois.dropna()
dfpois = dfpois.astype(float)
dfpois.head()
# Función para calcular la distancia entre dos puntos teniendo en cuenta la curvatura de la tierra (no Euclidea)
from math import radians, cos, sin, asin, sqrt, acos
def haversine(lon1, lat1, lon2, lat2):
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6373 # Radius of earth kilometers
return c * r
coord = pd.read_csv('./data/Coord_EK.csv')
coord.head()
sub = dfpois.loc[0,:].astype(float).to_frame().transpose()
# Creamos una nueva columna en el dataframe coord que indica si hay un evento próximo al element_key
# Una vez que un element_key tiene un punto cercano (<=75-200 metros) no es necesario revisar otros (break)
prueba_coord = coord.copy()
prueba_coord['poi_75m'] = 0
prueba_coord['poi_100m'] = 0
prueba_coord['poi_150m'] = 0
prueba_coord['poi_200m'] = 0
for c_index, c_row in prueba_coord.iterrows():
for df_index, df_row in sub.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= 0.075: # 75 metros
prueba_coord.at[c_index,'poi_75m'] = 1
break
elif dist <= 0.1:
prueba_coord.at[c_index,'poi_100m'] = 1
break
elif dist <= 0.15:
prueba_coord.at[c_index,'poi_150m'] = 1
break
elif dist <= 0.2:
prueba_coord.at[c_index,'poi_200m'] = 1
break
import folium
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
def plotDot(df):
df = df.copy()
folium.CircleMarker(location=[df.latitude, df.longitude],
radius=5,
fill=True,
fill_opacity=0.8,
fill_color=usedColor,
color='whitesmoke',
weight=0.5).add_to(this_map)
usedColor = 'purple'
prueba_coord.loc[prueba_coord.poi_200m == 1].apply(plotDot, axis = 1)
usedColor = 'blue'
prueba_coord.loc[prueba_coord.poi_150m == 1].apply(plotDot, axis = 1)
usedColor = 'green'
prueba_coord.loc[prueba_coord.poi_100m == 1].apply(plotDot, axis = 1)
usedColor = 'turquoise'
prueba_coord.loc[prueba_coord.poi_75m == 1].apply(plotDot, axis = 1)
usedColor = 'red'
sub.apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
El punto de interés aparece señalado en color rojo. Los parquÃmetros que están a una distancia inferior o igual a 75 metros son los de color turquesa. Los parquÃmetros que están a una distancia entre 75 y 100 metros se incluyen en color verde. Los parquÃmetros que están a una distancia entre 100 y 150 metros son los de color azul y los parquÃmetros que están a una distancia entre 150 y 200 metros se incluyen en color morado.
radio_dist_prox = 0.075
coord['poi'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in dfpois.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= radio_dist_prox:
coord.at[c_index,'poi'] = 1
break
coord.element_key.loc[coord.poi == 1].count() / coord.shape[0] * 100
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
dfpois.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.poi == 1].apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
# Necesario instalar previamente el paquete shapely con el siguiente comando:
# conda install -c scitools/label/archive shapely
from shapely.geometry import Point
from shapely.geometry.multipolygon import MultiPolygon
from shapely import wkt
from shapely.wkt import loads
url = "https://data.seattle.gov/api/views/6v75-vrvs/rows.csv?accessType=DOWNLOAD"
output1 = './data/Baseball_Field.csv'
dl_data(url,output1)
df_base = pd.read_csv(os.path.join('./data/Baseball_Field.csv'))
df_base.head()
df_base = df_base[['the_geom']]
pd.set_option('max_colwidth', 100000)
df_base.head(1)
df_base.shape
# La estructura MULTIPOLYGON consta de varios poligonos y cada poligono de puntos exteriores
# Por cada element_key buscamos si al menos tiene un punto de un poligono cercano
coord['baseball'] = 0
mp_lat = []
mp_lon = []
for c_index, c_row in coord.iterrows():
for index, row in df_base.iterrows():
multi = loads(row['the_geom'])
polygons = list(multi)
for p in polygons:
puntos = p.exterior.coords
for p in puntos:
latp = pd.to_numeric(p[1])
lonp = pd.to_numeric(p[0])
mp_lat.append(latp)
mp_lon.append(lonp)
dist = haversine(c_row.longitude, c_row.latitude, lonp, latp)
if dist <= radio_dist_prox:
coord.at[c_index,'baseball'] = 1
break
coord.element_key.loc[coord.baseball == 1].count() / coord.shape[0] * 100
mp = pd.DataFrame([mp_lat, mp_lon]).astype(float).transpose()
mp.columns = ['latitude', 'longitude']
mp = mp.drop_duplicates()
mp.shape
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
mp.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.baseball == 1].apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
Repetimos el mismo proceso para pistas de tenis
url = "https://data.seattle.gov/api/views/p8fp-a7qi/rows.csv?accessType=DOWNLOAD"
output1 = './data/Tennis_Court_Point.csv'
dl_data(url,output1)
df_tenis = pd.read_csv(os.path.join('./data/Tennis_Court_Point.csv'))
df_tenis.head()
df_tenis = df_tenis.the_geom.str.strip('POINT ()') \
.str.split(' ', expand=True) \
.rename(columns={0:'longitude', 1:'latitude'})
df_tenis = df_tenis.astype(float)
df_tenis.head()
coord['tennis'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in df_tenis.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= radio_dist_prox:
coord.at[c_index,'tennis'] = 1
break
coord.element_key.loc[coord.tennis == 1].count() / coord.shape[0] * 100
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_tenis.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.tennis == 1].apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
coord.loc[coord.tennis == 1]
url = "https://data.seattle.gov/api/views/ppq2-qxkx/rows.csv?accessType=DOWNLOAD"
output1 = './data/Swimming_Pools.csv'
dl_data(url,output1)
df_sp = pd.read_csv(os.path.join('./data/Swimming_Pools.csv'))
df_sp.head()
df_sp = df_sp[['LATITUDE','LONGITUDE']]
df_sp.columns = ['latitude', 'longitude']
coord['swim_pool'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in df_sp.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= radio_dist_prox:
coord.at[c_index,'swim_pool'] = 1
break
coord.element_key.loc[coord.swim_pool == 1].count() / coord.shape[0] * 100
coord = coord.drop(columns=['swim_pool'])
url = "https://data.seattle.gov/api/views/dxss-26kb/rows.csv?accessType=DOWNLOAD"
output1 = './data/Basketball_Court_Point.csv'
dl_data(url,output1)
df_bb = pd.read_csv(os.path.join('./data/Basketball_Court_Point.csv'))
df_bb.head()
df_bb = df_bb.the_geom.str.strip('POINT ()') \
.str.split(' ', expand=True) \
.rename(columns={0:'longitude', 1:'latitude'})
df_bb = df_bb.astype(float)
df_bb.head()
coord['basket'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in df_bb.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= radio_dist_prox:
coord.at[c_index,'basket'] = 1
break
coord.element_key.loc[coord.basket == 1].count() / coord.shape[0] * 100
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_bb.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.basket == 1].apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
url = "https://data.seattle.gov/api/views/vre6-ceji/rows.csv?accessType=DOWNLOAD"
output1 = './data/Soccer_Field.csv'
dl_data(url,output1)
df_soc = pd.read_csv(os.path.join('./data/Soccer_Field.csv'))
df_soc.head()
df_soc = df_soc.the_geom.str.strip('POINT ()') \
.str.split(' ', expand=True) \
.rename(columns={0:'longitude', 1:'latitude'})
df_soc = df_soc.astype(float)
df_soc.head()
coord['soccer'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in df_soc.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= radio_dist_prox:
coord.at[c_index,'soccer'] = 1
break
coord.element_key.loc[coord.soccer == 1].count() / coord.shape[0] * 100
this_map = folium.Map(prefer_canvas=True, max_bounds=False)
usedColor = 'dodgerblue'
coord.apply(plotDot, axis = 1)
usedColor = 'green'
df_soc.apply(plotDot, axis = 1)
usedColor = 'orange'
coord.loc[coord.soccer == 1].apply(plotDot, axis = 1)
map_bounds = this_map.get_bounds()
this_map.fit_bounds(map_bounds, max_zoom=20)
this_map.max_lat = map_bounds[1][0]
this_map.min_lat = map_bounds[0][0]
this_map.max_lon = map_bounds[1][1]
this_map.min_lon = map_bounds[0][1]
this_map
url = "https://data.seattle.gov/api/views/y5mt-y5i8/rows.csv?accessType=DOWNLOAD"
output1 = './data/Track_Fields.csv'
dl_data(url,output1)
df_ath = pd.read_csv(os.path.join('./data/Track_Fields.csv'))
df_ath.head()
df_ath = df_ath.the_geom.str.strip('POINT ()') \
.str.split(' ', expand=True) \
.rename(columns={0:'longitude', 1:'latitude'})
df_ath = df_ath.astype(float)
df_ath.head()
coord['athletism'] = 0
for c_index, c_row in coord.iterrows():
for df_index, df_row in df_ath.iterrows():
dist = haversine(c_row.longitude, c_row.latitude, df_row.longitude, df_row.latitude)
if dist <= 0.25:
coord.at[c_index,'athletism'] = 1
break
coord.element_key.loc[coord.athletism == 1].count() / coord.shape[0] * 100
coord = coord.drop(columns=['athletism'])
coord.head()
coord.to_csv('./data/Coord_cult_&_sport.csv', index=False)